*---------------------------------------------------------------------------------------------------------------------------------------
*** Setup
*---------------------------------------------------------------------------------------------------------------------------------------
clear all
set more off
version 14
program drop _all

* Confirm that the globals for the project root directory and the R 3.4.0 executable have been defined
assert !missing("$Wellness_WhatDoesWWDo")
* assert !missing("$RSCRIPT_PATH")

* All required add-ons are stored in the /packages and /auxiliary folders
adopath ++ "$Wellness_WhatDoesWWDo/scripts/packages"
adopath ++ "$Wellness_WhatDoesWWDo/scripts/auxiliary"
mata: mata mlib index

* Redo Lasso controls, regressions, and Westfall-Young p-values?
*	1 = redo all
* 0 = redo only if results files do not exist
local redo_all 0

* Program to replace text string from tex file
program define text_replace, nclass
	local tex_file `1'
	local from `2'
	local to `3'
	tempfile t
	filefilter "`tex_file'" "`t'", from("`from'") to("`to'") replace
	capture copy "`t'" "`tex_file'", replace
  while _rc > 0 {
    noisily di "...sleep..."
    sleep 1000
    capture copy "`t'" "`tex_file'", replace
  }
end

*---------------------------------------------------------------------------------------------------------------------------------------
*** Set Color Scheme
*---------------------------------------------------------------------------------------------------------------------------------------
local dkorange = `" "232 74 39" "'
local ltorange = `" "244 165 147" "'
local dkblue = `" "19 41 75" "'
local ltblue = `" "137 148 165" "'
local dkgray = `" "gs12" "'
local ltgray = `" "gs12*.5" "'
local vltgray = `" "gs15" "'

* Time frame of output variables
local timeframe 0816_0717

*---------------------------------------------------------------------------------------------------------------------------------------
*** ITT/IV/OLS
*---------------------------------------------------------------------------------------------------------------------------------------
qui {
* Step 0.a.  Code chunk locals and options
if 1 {

  * Input data
  local wellness_analysis_dta "$Wellness_WhatDoesWWDo/data/proc/wellness_analysis.dta"

  * Data and results output directories
	local data_dir "$Wellness_WhatDoesWWDo/data/proc/`timeframe'"
	cap mkdir "`data_dir'"
    cap mkdir "`data_dir'/lasso"

	local results_intermediate_files "$Wellness_WhatDoesWWDo/results/intermediate_files/`timeframe'"
	cap mkdir "`results_intermediate_files'"
    cap mkdir "`results_intermediate_files'/treat_effects"
      cap mkdir "`results_intermediate_files'/treat_effects/lasso"
      cap mkdir "`results_intermediate_files'/treat_effects/estimates"
      cap mkdir "`results_intermediate_files'/treat_effects/wyoung"
    cap mkdir "`results_intermediate_files'/winsorization"
      cap mkdir "`results_intermediate_files'/winsorization/estimates"

  local results_tables "$Wellness_WhatDoesWWDo/results/tables/`timeframe'"
  cap mkdir "`results_tables'"

  local results_figures "$Wellness_WhatDoesWWDo/results/figures/`timeframe'"
  cap mkdir "`results_figures'"

	* Recalculate Lasso controls? 1=Yes, 0=No
	local redo_lasso_selection `redo_all'

	* Re-estimate ITT/IV/OLS regressions? 1=Yes, 0=No
	local redo_regressions `redo_all'

	* Re-estimate Westfall-Young p-values? 1=Yes, 0=No
	local redo_wyoung `redo_all'
}

* Step 0.b.  Select observations to use from wellness_analysis.dta, save to tempfile `selection_data'
if 1 {
	* Drop individuals NOT in the treatment or control groups
	use if inlist(treat, 0, 1) using "`wellness_analysis_dta'", clear
	assert _N==4834

	tempfile selection_data
	saveold `selection_data', replace
}

* Step 0.c.  Define outcomes & domains, treatment, instruments, Lasso controls, and regression weights
if 1 {
	* (Endogenous) Treatment Variable
	local d hra_c_nomiss

	* Instruments
	* local instruments treat StudyArm
	local instruments treat

	* Define lists of potential outcomes
	local spend_vars_admin    spend_0816_0717 spendRx_0816_0717 spendOff_0816_0717 spendHosp_0816_0717 nonzero_spend_0816_0717
	local spend_vars_survey   druguse_0717 physician_0717 hospital_0717
	local job_vars_admin      salaryRaise_0616_0717 promotion_0616_0717 titleChange_0616_0717 terminated_0717 sickleave_0816_0717
	local job_vars_survey     sickdays_0717 hrsworked50_0717 jobsatisf1_0717 jobsatisf2_0717 mgmtsafety_0717 happywork_0717 presenteeism_0717 productive_0717 promotion_0717 jobsearch1_0717 jobsearch2_0717
	local job_vars_index      prod_index_yr1
	local health_vars_admin   marathon_2017 gym_0816_0717
	local health_vars_survey  everscreen_0717 active_0717 active_try_0717 cursmk_0717 drink_0717 drinkhvy_0717 chronic_0717 health1_0717 health2_0717 problems_0717 energy_0717 ehealth_0717 overweight_0717 badhealth_0717 sedentary_0717

	* Quantity outcomes for Appendix
	local quantity_vars_admin anyspend_0816_0816 anyspend_0816_0916 anyspend_0816_1016 anyspend_0816_0117 anyspend_0816_0717 visitRx_0816_0717 visitOff_0816_0717 visitHosp_0816_0717

	* Domains of outcomes
	local spend_vars "`spend_vars_admin' `spend_vars_survey'"
	local job_vars "`job_vars_admin' `job_vars_survey' `job_vars_index'"
	local health_vars "`health_vars_admin' `health_vars_survey'"
	local quantity_vars "`quantity_vars_admin'"

	* Double-Lasso: common control variables
	local svy_hvars "everscreen active active_try cursmk othersmk formsmk drink drinkhvy chronic health1 health2 problems energy ehealth overweight badhealth sedentary"
	local svy_uvars "druguse physician hospital"
	local svy_prodvars "sickdays hrsworked50 jobsatisf1 jobsatisf2 mgmtsafety"
	local base_strata_vars "male age50 age37_49 white salaryM1 salaryQ1 salaryQ2 salaryQ3 faculty AP"
	local lasso_common_ctrls "`svy_hvars' `svy_uvars' `svy_prodvars' `base_strata_vars'"

	* Double-Lasso: Baseline outcomes of admin vars, by domain
	local spend_vars_0  "spend_0715_0716 nonzero_spend_0715_0716 spendRx_0715_0716 spendOff_0715_0716 spendHosp_0715_0716"
	local job_vars_0    "salary_0616 sickleave_0815_0716"
	local health_vars_0 "marathon_2014 marathon_2015 marathon_2016 gym_0815_0716"
	* local quantity_vars_0 "nonzero_spend_0715_0716 visitRx_0715_0716 visitOff_0715_0716 visitHosp_0715_0716 visitOthr_0715_0716"
	local quantity_vars_0 "nonzero_spend_0715_0716 visitRx_0715_0716 visitOff_0715_0716 visitHosp_0715_0716"

	* Define weighting variable, if any, for each outcome
	foreach domain in spend_vars job_vars health_vars quantity_vars {
		foreach y in ``domain'' {
			if (strpos("`y'", "spend")==1) & (strpos("`y'", "0816_0717")>0)           local aw_`y' "covg_0816_0717"
			else if (strpos("`y'", "visit")==1) & (strpos("`y'", "0816_0717")>0)      local aw_`y' "covg_0816_0717"
			else if (strpos("`y'", "sickleave")==1) & (strpos("`y'", "0816_0717")>0)  local aw_`y' "sickdays_eligible_0816_0717"
			else                                                                      local aw_`y'
		}
	}
}

* Step 1.  Select controls that predict y or d, for both IV and OLS samples
foreach domain in spend_vars job_vars health_vars quantity_vars {
foreach y in ``domain'' {
	* Notation
	*	y:   Outcome variable
	*	d:   (Endogenous) treatment variable
	*	aw:  Regression weights - leave empty for unweighted regression (aw=1)

	* Output data and results files
	local lasso_data "`data_dir'/lasso/lasso_data_`y'.dta"
	local outfiles lasso_data
	foreach effect in IV OLS {
		local lasso_do_`effect'_y "`results_intermediate_files'/treat_effects/lasso/lasso_do_`effect'_y_`y'.do"
		local lasso_do_`effect'_d "`results_intermediate_files'/treat_effects/lasso/lasso_do_`effect'_d_`y'.do"
		local outfiles `outfiles' lasso_do_`effect'_y lasso_do_`effect'_d
	}

	* Skip loop iteration unless either (a) an output/results file does not exist, or (b) redo_lasso_selection flag set to 1
	capture tokenize `outfiles'
	while !missing("`*'") {
		capture confirm file "``1''"
		if _rc break
		macro shift
	}
	if _rc==0 & `redo_lasso_selection'==0 {
		di "Using prior Lasso results for y = `y': all Lasso files exist, and redo_lasso_selection flag = `redo_lasso_selection'"
		continue
	}
	else {
		di "Calculating Lasso results for y = `y': _rc=`=_rc', redo_lasso_selection=`redo_lasso_selection'"
	}

	* Weights
	local aw `aw_`y''

	* Load data
	local keepvars AnalysisID `y' `d' `instruments' ``domain'_0' `aw' Strata `lasso_common_ctrls'
	use `keepvars' using "`selection_data'", clear

	* Add a weights variable = 1 for unweighted regressions
	if missing("`aw'") {
		gen aw = 1
		local aw aw
	}

	* Make sure no left-over R <-> Stata transfer files exist
	cap rm "`lasso_data'"
	foreach effect in IV OLS {
		cap rm "`lasso_data_`effect''"
		cap rm "`lasso_do_`effect'_y'"
		cap rm "`lasso_do_`effect'_d'"
	}

	* Save variables and observations to be used in the double-selection algorithm
	local ctrl_vars ``domain'_0' `lasso_common_ctrls'
	local lasso_varlist=subinstr("`y' `d' `ctrl_vars'", " ", ", ", .)
	gen byte in_lasso_IV = (!missing(`lasso_varlist') & `aw'>0)
	gen byte in_lasso_OLS = in_lasso_IV & (treat==1)
	saveold "`lasso_data'", replace

	* Determine and define variables selected by LASSO to predict outcome or treatment variables
	foreach effect in IV OLS {
		di ""
		di "`effect' variable selection for `y'"
		di "`="_"*80'"
    rscript using "$Wellness_WhatDoesWWDo/scripts/auxiliary/double_lasso.R", args("AnalysisID" "in_lasso_`effect'" "`y'" "`d'" "`ctrl_vars'" "`aw'" "`lasso_data'" "`lasso_do_`effect'_y'" "`lasso_do_`effect'_d'")
  }
}
}

* Step 2.  Estimate ITT/IV/OLS regressions, saving regression results
foreach domain in spend_vars job_vars health_vars quantity_vars {
foreach y in ``domain'' {
	* Notation
	*	y:   Outcome variable
	*	d:   (Endogenous) treatment variable
	*	aw:  Regression weights - leave empty for unweighted regression (aw=1)

	* Input data and results files
	local lasso_data "`data_dir'/lasso/lasso_data_`y'.dta"
	foreach effect in IV OLS {
		local lasso_do_`effect'_y "`results_intermediate_files'/treat_effects/lasso/lasso_do_`effect'_y_`y'.do"
		local lasso_do_`effect'_d "`results_intermediate_files'/treat_effects/lasso/lasso_do_`effect'_d_`y'.do"
	}

	* Output data and results files
	local regsave_file "`results_intermediate_files'/treat_effects/estimates/all_estimates_`y'.dta"

	* Skip loop iteration unless either (a) an output/results file does not exist, or (b) redo_regressions flag set to 1
	capture confirm file "`regsave_file'"
	if _rc==0 & `redo_regressions'==0 {
		di "Using prior regression results for y = `y': all estimation files exist, and redo_regressions flag = `redo_regressions'"
		continue
	}
	else {
		di "Calculating regression results for y = `y': _rc=`=_rc', redo_regressions=`redo_regressions'"
	}

	* Weights
	local aw `aw_`y''
	if missing("`aw'") {
		local aw aw
	}

	* Load the lasso data, adding in controls selected by Lasso
	use "`lasso_data'", clear
	if ("`aw'"=="aw") assert `aw'==1

	* Add controls selected by Lasso to predict y and d, for the IV/OLS samples
	foreach effect in IV OLS {
		foreach outvar in y d {
			* Generate Lasso-selected controls
			run "`lasso_do_`effect'_`outvar''"

			* If no controls were selected, generate a placeholder control equal to 0
			capture confirm variable v_dbl_`outvar'_1
			if _rc {
				gen v_dbl_`outvar'_1 = 0
			}

			* Rename variables to keep them unique
			foreach var of varlist v_dbl_`outvar'_* {
				rename `var' `=subinstr("`var'", "v_dbl_", "v_dbl_`effect'_", .)'
			}
		}
	}

	* Table settings
	local format format(%5.3f)
	if inlist(1, strpos("`y'", "spend"), strpos("`y'", "salary_")) local format format(%9.1f)
	local tbl_settings "`format' parentheses(stderr) asterisk(10 5 1)"
	local tbl_settings_mean "`format' parentheses(stderr)"
	tempfile `y'


	* ----------------------------------------------------------------------------------------
	* 0. Mean
	* ----------------------------------------------------------------------------------------
	cap drop rhs
	gen rhs = 1
	qui _regress `y' rhs [aw=`aw'], noconst robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/ITT_Mean_`y'", replace
	regsave rhs using ``y'', p nose table(itt_mean,`tbl_settings_mean') replace

	* ----------------------------------------------------------------------------------------
	* 1. ITT
	* ----------------------------------------------------------------------------------------
	local effect IV
	replace rhs = treat

	* (a) No Controls
	qui _regress `y' rhs [aw=`aw'], robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/ITT_NoControl_`y'", replace
	regsave rhs using ``y'', p table(itt_no_controls,`tbl_settings') addlabel(wy_pval,"[]") append

	* (b) Strata FEs
	qui _regress `y' rhs [aw=`aw'], a(Strata) robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/ITT_StrataFE_`y'", replace
	regsave rhs using ``y'', p table(itt_strata_fe,`tbl_settings') addlabel(wy_pval,"[]") append

	* (c) Double-Lasso Controls
	qui _regress `y' rhs v_dbl_`effect'_y_* [aw=`aw'] if in_lasso_`effect', robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/ITT_Lasso_`y'", replace
	regsave rhs using ``y'', p table(itt_dbl_lasso,`tbl_settings') addlabel(wy_pval,"[]") append

	* ----------------------------------------------------------------------------------------
	* 2. IV (z = treat)
	* ----------------------------------------------------------------------------------------
	local effect IV
	local z treat
	replace rhs = `d'

	* (a) No Controls
	qui ivregress 2sls `y' (rhs = i.`z') [aw=`aw'], robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/IV_z`z'_NoControl_`y'", replace
	regsave rhs using ``y'', p table(iv1_no_controls,`tbl_settings') addlabel(wy_pval,"[]") append

	* (b) Strata FEs
	qui ivregress 2sls `y' (rhs = i.`z') i.Strata [aw=`aw'], robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/IV_z`z'_StrataFE_`y'", replace
	regsave rhs using ``y'', p table(iv1_strata_fe,`tbl_settings') addlabel(wy_pval,"[]") append

	* (c) Double-Lasso Controls
	qui ivregress 2sls `y' (rhs = i.`z') v_dbl_`effect'_y_* v_dbl_`effect'_d_* [aw=`aw'] if in_lasso_`effect', robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/IV_z`z'_Lasso_`y'", replace
	regsave rhs using ``y'', p table(iv1_dbl_lasso,`tbl_settings') addlabel(wy_pval,"[]") append

	* ----------------------------------------------------------------------------------------
	* 3. OLS (treatment group only)
	* ----------------------------------------------------------------------------------------
	local effect OLS
	replace rhs = `d'

	* (a) No Controls
	qui _regress `y' rhs [aw=`aw'] if treat, robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/OLS_NoControl_`y'", replace
	regsave rhs using ``y'', p table(ols_no_controls,`tbl_settings') addlabel(wy_pval,"[]") append

	* (b) Strata FEs
	qui _regress `y' rhs [aw=`aw'] if treat, a(Strata) robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/OLS_StrataFE_`y'", replace
	regsave rhs using ``y'', p table(ols_strata_fe,`tbl_settings') addlabel(wy_pval,"[]") append

	* (c) Double-Lasso Controls
	qui _regress `y' rhs v_dbl_`effect'_y_* v_dbl_`effect'_d_* [aw=`aw'] if treat & in_lasso_`effect', robust
	estimates save "`results_intermediate_files'/treat_effects/estimates/OLS_Lasso_`y'", replace
	regsave rhs using ``y'', p table(ols_dbl_lasso,`tbl_settings') addlabel(wy_pval,"[]") append

	* Convert regsaved tempfile to permanent file
	preserve
	use ``y'', clear
	saveold "`regsave_file'", replace
	restore
}
}

* Step 3.  Westfall-Young corrected p-values
*   Time to compute with 10,000 bootstraps:
*     spend_vars (9 vars):    106 minutes
*     job_vars (14 vars):     152 minutes
*     health_vars (17 vars):  154 minutes
*     quantity_vars (9 vars): 129 minutes
*     TOTAL TIME (40 vars):   541 minutes (9.0 hours)
foreach domain in spend_vars quantity_vars job_vars health_vars {
foreach group in `domain'_admin `domain'_survey `domain'_index {
if !missing("``group''") {
	* Notation
	*	y:   Outcome variable
	*	aw:  Regression weights - leave empty for unweighted regression (aw=1)

	* Treatment
	local rhs treat

	* Create a "wide" data set with all outcomes in the family, along with relevant Lasso controls
	* Load the lasso data, adding in controls selected by Lasso
	foreach y in ``group'' {
		* Input data and results files
		local lasso_data "`data_dir'/lasso/lasso_data_`y'.dta"
		local lasso_do_IV_y "`results_intermediate_files'/treat_effects/lasso/lasso_do_IV_y_`y'.do"

		* Load data
		use "`lasso_data'", clear

		* Generate controls selected by Lasso to predict y
		run "`lasso_do_IV_y'"

		* If no controls were selected, generate a placeholder control equal to 0
		capture confirm variable v_dbl_y_1
		if _rc {
			gen v_dbl_y_1 = 0
		}

		* Rename variables to keep them unique
		foreach var of varlist v_dbl_y_* {
			rename `var' `=subinstr("`var'", "v_dbl_", "v_`y'_", .)'
		}
		rename in_lasso_IV in_`y'

		* Keep variables for use in the post-Lasso regression
		keep AnalysisID Strata `y' `rhs' v_`y'_y_* `aw_`y'' in_`y'

		tempfile wy_`y'
		qui save `wy_`y''
	}
	tokenize ``group''
	while !missing("`*'") {
		if "`*'"=="``group''" use `wy_`1'', clear
		else merge 1:1 AnalysisID using `wy_`1'', assert(match) nogen noreport
		macro shift
	}

	* Models
	local family_models_itt_no_controls
	local family_models_itt_strata_fe
	local family_models_itt_dbl_lasso
	foreach y in ``group'' {
		* Weights
		if !missing("`aw_`y''") local aw [aw=`aw_`y'']
		else                    local aw

		* (a) No Controls
		local model _regress `y' `rhs' `aw', robust
		local family_models_itt_no_controls `"`family_models_itt_no_controls' "`model'""'

		* (b) Strata FEs
		local model _regress `y' `rhs' `aw', a(Strata) robust
		local family_models_itt_strata_fe `"`family_models_itt_strata_fe' "`model'""'

		* (b) Strata FEs
		local model _regress `y' `rhs' v_`y'_y_* `aw' if in_`y', robust
		local family_models_itt_dbl_lasso `"`family_models_itt_dbl_lasso' "`model'""'
	}

	* Westfall-Young
	local nboot 10000
	qui foreach model_spec in itt_no_controls itt_strata_fe itt_dbl_lasso {
		capture confirm file "`results_intermediate_files'/treat_effects/wyoung/wy_`group'_`model_spec'.dta"
		if _rc | `redo_wyoung'==1 {
			noisily {
				di ""
				di "Calculating Westfall-Young p-values"
				di "Group: `group'"
				di "Model spec: `model_spec'"
			}
			timer on 1
			preserve
			wyoung, familyp(`rhs') bootstraps(`nboot') seed(11) cmd(`family_models_`model_spec'') detail replace
			noisily list outcome coef stderr p pwyoung, sep(0)
			saveold "`results_intermediate_files'/treat_effects/wyoung/wy_`group'_`model_spec'.dta", replace
			timer off 1
			timer list 1
			noisily di "Time to complete: " %8.2f `=`r(t1)'/60' " minutes, `nboot' bootstraps"
			timer clear 1
			restore
		}
	}
}
}
}

* Step 4.  MASTER table of treatment effects: all outcomes and specifications
if 1 {
	* Append all estimates into a single table
	clear
	gen outcome = ""
	local spend_vars_table `spend_vars'
	local quantity_vars_table `quantity_vars'
	local job_vars_table `job_vars'
	local health_vars_table `health_vars'
	local spend_vars_table_label     "+ Medical Spending----------------------------"
	local quantity_vars_table_label  "+ Medical Utilization (Quantity)--------------"
	local job_vars_table_label       "+ Employment and Productivity-----------------"
	local health_vars_table_label    "+ Health Status and Behaviors-----------------"
	local prev_domain
	foreach domain in spend_vars quantity_vars job_vars health_vars {
		if !missing("``domain'_table'") {
			foreach y in ``domain'_table' {
				append using "`results_intermediate_files'/treat_effects/estimates/all_estimates_`y'.dta"
				ingap -6
				replace outcome = "`y'" if missing(outcome) & (_n>_N-6)
				replace var = "`y'" if var=="rhs_coef"
				replace var = "wy_pval_`y'" if var=="wy_pval"
				sortobs, values(`=_N') before(`=_N-2')
			}
			replace var = "``domain'_table_label'" in `=``prev_domain'_table_nrow'+1'
			ingap `=``prev_domain'_table_nrow'+1'
			local `domain'_table_nrow = _N
			local prev_domain `domain'
		}
	}

	* Merge in Westfall-Young p-values
	preserve
	tempfile wyoung_table
	foreach domain in spend_vars quantity_vars job_vars health_vars {
		foreach group in `domain'_admin `domain'_survey `domain'_index {
			if !missing("``group''") {
				foreach model_spec in itt_no_controls itt_strata_fe itt_dbl_lasso {
					use outcome pwyoung using "`results_intermediate_files'/treat_effects/wyoung/wy_`group'_`model_spec'.dta", clear

					* Format and rename variables for merging into table of results
					replace outcome = "wy_pval_" + outcome
					tostring pwyoung, replace force format(%5.3f)
					replace pwyoung = "[" + pwyoung + "]"
					rename outcome var
					rename pwyoung `model_spec'

					* Save as tempfile
					capture confirm file `wyoung_table'
					if !_rc merge 1:1 var using `wyoung_table', nogen noreport
					save `wyoung_table', replace
				}
			}
		}
	}
	restore
	gen sortorder = _n
	merge m:1 var using `wyoung_table', nogen noreport update replace
	sort sortorder
	drop sortorder

  * Add variable time horizon (months since treatment start, 07/2017)
  gen time_horizon = ""
  order outcome var time_horizon
  gen end_date = substr(outcome, -4, 4) if !missing(outcome) & missing(outcome[_n-1])
  assert inlist(end_date, "0117", "0717", "0816", "0916", "1016", "2017", "_yr1", "")
  replace time_horizon = string(mofd(date(end_date, "M20Y")) - mofd(date("0716", "M20Y")))  if !inlist(end_date, "2017", "")
  replace time_horizon = "12"                                                               if inlist(end_date, "2017", "_yr1") & !strpos(outcome, "marathon")
  replace time_horizon = "12"                                                               if inlist(end_date, "2017") & strpos(outcome, "marathon")
	drop end_date

	* Keep specified regression output stats, and clean row names
	drop if inlist(var, "r2")
	replace var = "pval" if strpos(var, "rhs_pval")
	replace var = "stderr" if strpos(var, "rhs_stderr")
	replace var = "wy_pval" if strpos(var, "wy_pval")
	replace var = "N_obs" if var=="N"
	cleanvars var

	label var outcome "Outcome Variable Name"
	label var var "Outcome Variable"
  label var time_horizon "Horizon (months)"
	label var itt_mean "Mean"
	label var itt_no_controls "No Controls"
	label var itt_strata_fe "Strata FEs"
	label var itt_dbl_lasso "Post-Lasso"
	label var iv1_no_controls "No Controls"
	label var iv1_strata_fe "Strata FEs"
	label var iv1_dbl_lasso "Post-Lasso"
	label var ols_no_controls "No Controls"
	label var ols_strata_fe "Strata FEs"
	label var ols_dbl_lasso "Post-Lasso"

	* Save results table
	local treat_effects_all "`results_intermediate_files'/treat_effects/treat_effects_all"
	save "`treat_effects_all'", replace

	* Export results table as text file
	use "`treat_effects_all'", clear
	gen sortorder = _n
	local addlines 1
	set obs `=_N+1'
	replace sortorder = _n-(_N) if _n>_N-1
	sort sortorder
	drop sortorder
	* browse
	desc, varlist
	foreach var in `r(varlist)' {
		local lab: variable label `var'
		if inlist("`var'", "var", "outcome", "time_horizon") replace `var' = "`lab'" if _n==1
		if strpos("`var'", "itt")==1 replace `var' = "[ITT] `lab'" if _n==1
		if strpos("`var'", "iv1")==1 replace `var' = "[IV1] `lab'" if _n==1
		if strpos("`var'", "ols")==1 replace `var' = "[OLS] `lab'" if _n==1
	}
	outfile using "`treat_effects_all'.txt", noquote replace wide rjs
}

* Step 5. Latex tables
if 1 {
	* List of ITT/IV/OLS tables to create
	local table_type_list ITT_main IV_OLS_main
	foreach domain in spend_vars quantity_vars job_vars health_vars {
		foreach class in admin survey index {
			if !missing("``domain'_`class''") {
				local table_type_list `table_type_list' ITT_A_`domain'_`class' IV_OLS_A_`domain'_`class'
			}
		}
	}

	* Make the tables
	foreach table_type in `table_type_list' {
		* For appendix tables, determine which group of variables goes in the table
		local group_pos = strpos("`table_type'","ITT_A_")+6
		if `group_pos'==6 local group_pos = strpos("`table_type'","IV_OLS_A_")+9
		if `group_pos'==9 local group
		else local group = substr("`table_type'", `group_pos', .)
		local class = substr("`group'", strrpos("`group'","_")+1, .)
		if !missing("`class'") local class = " [`class']"
		if "`class'" == " [index]" local class = " [admin/survey]"

		* Specify which outcomes, specifications, and statistics to report in the table
		* Table 3: ITT (Main)
		if "`table_type'"=="ITT_main" {
			local spend_vars_table `spend_vars_admin'

      local job_vars_admin_drop titleChange_0616_0717 salaryRaise_0616_0717
      local job_vars_admin_keep : list job_vars_admin - job_vars_admin_drop
			local job_vars_table `job_vars_admin_keep' mgmtsafety_0717 `job_vars_index'

      local health_vars_table `health_vars_admin' everscreen_0717
      local keep_vars `spend_vars_table' `job_vars_table' `health_vars_table'

			* Specifications
			local show_mean 1
			local show_itt 1
			local show_iv1 0
			local show_ols 0

			* Regression statistics
			local show_stderr 1
			local show_pval 0
			local show_wy_pval 1
			local show_N_obs 1
		}
		* Table 4: IV and OLS (Main)
		else if "`table_type'"=="IV_OLS_main" {
			local spend_vars_table `spend_vars_admin'

      local job_vars_admin_drop titleChange_0616_0717 salaryRaise_0616_0717
      local job_vars_admin_keep : list job_vars_admin - job_vars_admin_drop
			local job_vars_table `job_vars_admin_keep' mgmtsafety_0717 `job_vars_index'

      local health_vars_table `health_vars_admin' everscreen_0717
      local keep_vars `spend_vars_table' `job_vars_table' `health_vars_table'

			* Specifications
			local show_mean 0
			local show_itt 0
			local show_iv1 1
			local show_ols 1

			* Regression statistics
			local show_stderr 1
			local show_pval 0
			local show_wy_pval 0
			local show_N_obs 1
		}
		* Table A.1: ITT - by Group (Appendix)
		else if strpos("`table_type'", "ITT_A_") {
			local keep_vars ``group''

			* Specifications
			local show_mean 1
			local show_itt 1
			local show_iv1 0
			local show_ols 0

			* Regression statistics
			local show_stderr 1
			local show_pval 0
			local show_wy_pval 1
			local show_N_obs 1
		}
		* Table A.2: IV and OLS - by Group (Appendix)
		else if strpos("`table_type'", "IV_OLS_A_") {
			local keep_vars ``group''

			* Specifications
			local show_mean 0
			local show_itt 0
			local show_iv1 1
			local show_ols 1

			* Regression statistics
			local show_stderr 1
			local show_pval 0
			local show_wy_pval 0
			local show_N_obs 1
		}

		* Make list of outcomes to keep in the table
		clear
		local nobs = wordcount("`keep_vars'")
		set obs `=`nobs'+1'
		gen outcome = "buffer" if _n==1
		if `nobs'>0 {
			tokenize "`keep_vars'"
			forvalues i = 1/`nobs' {
				replace outcome = "`1'" if _n==`i'+1
				macro shift
			}
		}
		tempfile keeprows
		save `keeprows'

		* Load MASTER table of effects, keeping only selected outcomes, specifications, and stats
		use "`treat_effects_all'.dta", clear
    drop time_horizon
		replace outcome = "buffer" if missing(outcome)
		gen sortorder = _n
		merge m:1 outcome using "`keeprows'", assert(match master) keep(match) nogen noreport
		sort sortorder
		drop if outcome=="buffer" & outcome[_n-1]=="buffer" & missing(var)
		drop if outcome=="buffer" & !missing(var) & (outcome[_n+1]=="buffer" | missing(var[_n+1]))
		drop if outcome=="buffer" & (_n==_N)
		if `show_mean'==0 drop itt_mean
		if `show_itt'==0  drop itt_no_controls itt_strata_fe itt_dbl_lasso
		if `show_iv1'==0  drop iv1_no_controls iv1_strata_fe iv1_dbl_lasso
		if `show_ols'==0  drop ols_no_controls ols_strata_fe ols_dbl_lasso
    
    * Revised table format for main IV/OLS table
    if "`table_type'"=="IV_OLS_main" & `show_iv1'==1 drop iv1_strata_fe
    if "`table_type'"=="IV_OLS_main" & `show_ols'==1 drop ols_strata_fe
    
		cap replace itt_mean = "N="+itt_mean if var=="N_obs"
		foreach effect in itt iv1 ols {
			foreach model_spec in `effect'_no_controls `effect'_strata_fe `effect'_dbl_lasso {
				cap replace `model_spec' = "N="+`model_spec' if var=="N_obs"
			}
		}
		foreach stat in stderr pval wy_pval N_obs {
			if `show_`stat''==0 drop if var=="`stat'"
			cap replace var = "" if var=="`stat'"
		}
		drop outcome sortorder
		* browse

		* Create LaTeX table
		if "`table_type'"=="ITT_main" {
			local filename itt_`timeframe'
			local using using "`results_tables'/`filename'.tex"
			local texsave_settings "replace autonumber nofix"
			local marker marker("tab:`filename'")
			local title title(`"First-Year Treatment Effects (ITT)"')
			local fn footnote("Notes: Each row and column reports estimates from a separate regression, where observations include individuals in the control or treatment groups. The outcome in each regression is specified by the table row. The focal independent variable is an indicator for inclusion in the treatment group, and the control strategy is specified by the column. Post-Lasso controls include covariates selected by Lasso to predict the dependent variable. The set of potential predictors include baseline values of all available variables in the same family of outcomes, strata variables, and the baseline (2016) survey variables reported in Table~\ref{tab:balance_tests1}, as well as all two-way interactions between these predictors. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference, i.e., not adjusting for multiple outcomes. Family-wise \(p\)-values, reported in brackets, adjust for the number of outcome variables in each family. See Appendix Tables~\ref{tab:appendix_itt_spend_vars_admin_`timeframe'}-\ref{tab:appendix_itt_health_vars_survey_`timeframe'} for results for all outcomes, categorized by family.")
			local size = "size(scriptsize)"

			* Export as LaTeX table
			local hline1 = 2
			local hline2 = `hline1' + wordcount("`spend_vars_table'")*5 - 1
			local hline3 = `hline2' + 2
			local hline4 = `hline3' + wordcount("`job_vars_table'")*5 - 1
			local hline5 = `hline4' + 2
			local hlines hlines(`hline1' `hline3' `hline5')
			texsave `using', `texsave_settings' varlabels `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

			* Additional LaTeX table tweaks
			local texfile "`results_tables'/`filename'.tex"
			text_replace "`texfile'" "\BSaddlinespace[\BSbelowrulesep]" ""
			text_replace "`texfile'" "\BSmidrule &&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[0.9ex]"
			text_replace "`texfile'" "\BSmidrule\W&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[0.9ex]"
			text_replace "`texfile'" "\BSbottomrule" "\BSbottomrule\BSaddlinespace[-1.7ex]"
			text_replace "`texfile'" "\W&&&& \BStabularnewline" "\W\BSaddlinespace[0.9ex]"
		}
		else if "`table_type'"=="IV_OLS_main" {
			local filename iv_ols_`timeframe'
			local using using "`results_tables'/`filename'.tex"
			local texsave_settings "replace autonumber nofix"
			local marker marker("tab:`filename'")
			local title title(`"First-Year Treatment Effects: Experimental versus Observational Estimates"')
			local fn footnote("Notes: Each row and column reports estimates from a separate regression. The outcome in each regression is specified by the table row, and the (endogenous) focal independent variable is an indicator for completing the screening and health risk assessments (HRA). For the IV specifications (columns (1)-(3)), the instrument is an indicator for inclusion in the treatment group, and observations include individuals in the control or treatment groups. For the OLS specifications (columns (4)-(6)), there is no instrument and observations are restricted to individuals in the treatment group. The control strategy is specified by the column. Post-Lasso controls include covariates selected by Lasso to predict either the dependent variable or the focal independent variable. The set of potential predictors include baseline values of all available variables in the same family of outcomes, strata variables, and the baseline (2016) survey variables reported in Table~\ref{tab:balance_tests1}, as well as all two-way interactions between these predictors. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
			local size = "size(scriptsize)"

			* Export as LaTeX table
			local hline1 = 2
			local hline2 = `hline1' + wordcount("`spend_vars_table'")*4 - 1
			local hline3 = `hline2' + 2
			local hline4 = `hline3' + wordcount("`job_vars_table'")*4 - 1
			local hline5 = `hline4' + 2
			local hlines hlines(`hline1' `hline3' `hline5')
			texsave `using', `texsave_settings' varlabels `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

			* Additional LaTeX table tweaks
			local texfile "`results_tables'/`filename'.tex"
			local line1 &{(1)}&{(2)}&{(3)}&{(4)} \BStabularnewline
			local line2 \BSmidrule
			local line3 & \BSmulticolumn{2}{c}{Experimental (IV)} & \BSmulticolumn{2}{c}{Observational (OLS)} \BStabularnewline
			local line4 \BScmidrule(l){2-3} \BScmidrule(l){4-5}
			text_replace "`texfile'" "`line1'" "`line1' \n `line2' \n `line3' \n `line4'"
			text_replace "`texfile'" "\BSbottomrule \BSaddlinespace[\BSbelowrulesep]" "\BSbottomrule\BSaddlinespace[-1.5ex]"
		}
		else if strpos("`table_type'", "ITT_A_") {
			local filename appendix_itt_`group'_`timeframe'
			local using using "`results_tables'/`filename'.tex"
			local texsave_settings "replace autonumber nofix"
			local marker marker("tab:`filename'")
			local title title(`"First-Year Treatment Effects (ITT)"')
			local fn footnote("Notes: The outcomes in this table constitute a single family of outcomes for calculating family-wise \(p\)-values. Each row and column reports estimates from a separate regression, where observations include individuals in the control or treatment groups. The outcome in each regression is specified by the table row. The focal independent variable is an indicator for inclusion in the treatment group, and the control strategy is specified by the column. Post-Lasso controls include covariates selected by Lasso to predict the dependent variable. The set of potential predictors include baseline values of all available variables in the same family of outcomes, strata variables, and the baseline (2016) survey variables reported in Table~\ref{tab:balance_tests1}, as well as all two-way interactions between these predictors. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference, i.e., not adjusting for multiple outcomes. Family-wise \(p\)-values, reported in brackets, adjust for the number of outcome variables in the table.")
			if "`group'"!="spend_vars_admin" local fn footnote("Notes: The outcomes in this table constitute a single family of outcomes for calculating family-wise \(p\)-values. See notes to Appendix Table~\ref{tab:appendix_itt_spend_vars_admin_`timeframe'} for additional details.")
			local size = "size(scriptsize)"

			* Export as LaTeX table
			local hline1 = 2
			local hlines hlines(`hline1')
			texsave `using', `texsave_settings' varlabels `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

			* Additional LaTeX table tweaks
			local texfile "`results_tables'/`filename'.tex"
			local addspace "1.4ex"
			if "`group'"=="health_vars_survey" local addspace "1.2ex"
			text_replace "`texfile'" "\BSaddlinespace[\BSbelowrulesep]" ""
			text_replace "`texfile'" "\BSmidrule &&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.5ex]"
			text_replace "`texfile'" "\BSmidrule\W&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.5ex]"
			text_replace "`texfile'" "\BSbottomrule" "\BSbottomrule\BSaddlinespace[-1.5ex]"
			text_replace "`texfile'" "\W&&&& \BStabularnewline" "\W\BSaddlinespace[`addspace']"
		}
		else if strpos("`table_type'", "IV_OLS_A_") {
			local filename appendix_iv_ols_`group'_`timeframe'
			local using using "`results_tables'/`filename'.tex"
			local texsave_settings "replace autonumber nofix"
			local marker marker("tab:`filename'")
			local title title(`"First-Year Treatment Effects: Experimental versus Observational Estimates"')
			local fn footnote("Notes: Each row and column reports estimates from a separate regression. The outcome in each regression is specified by the table row, and the (endogenous) focal independent variable is an indicator for completing the screening and HRA. For the IV specifications (columns (1)-(3)), the instrument is an indicator for inclusion in the treatment group, and observations include individuals in the control or treatment groups. For the OLS specifications (columns (4)-(6)), there is no instrument and observations are restricted to individuals in the treatment group. The control strategy is specified by the column. Post-Lasso controls include covariates selected by Lasso to predict either the dependent variable or the focal independent variable. The set of potential predictors include baseline values of all available variables in the same family of outcomes, strata variables, and the baseline (2016) survey variables reported in Table~\ref{tab:balance_tests1}, as well as all two-way interactions between these predictors. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
			if "`group'"!="spend_vars_admin" local fn footnote("Notes: See notes to Appendix Table~\ref{tab:appendix_iv_ols_spend_vars_admin_`timeframe'}.")
			local size = "size(scriptsize)"

			* Export as LaTeX table
			local hline1 = 2
			local hlines hlines(`hline1')
			texsave `using', `texsave_settings' varlabels `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

			* Additional LaTeX table tweaks
			local texfile "`results_tables'/`filename'.tex"
			local line1 &{(1)}&{(2)}&{(3)}&{(4)}&{(5)}&{(6)} \BStabularnewline
			local line2 \BSmidrule
			local line3 & \BSmulticolumn{3}{c}{Experimental (IV)} & \BSmulticolumn{3}{c}{Observational (OLS)} \BStabularnewline
			local line4 \BScmidrule(l){2-4} \BScmidrule(l){5-7}
			text_replace "`texfile'" "`line1'" "`line1' \n `line2' \n `line3' \n `line4'"
			local addspace "1.5ex"
			text_replace "`texfile'" "\BSaddlinespace[\BSbelowrulesep]" ""
			text_replace "`texfile'" "\BSmidrule &&&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.5ex]"
			text_replace "`texfile'" "\BSmidrule\W&&&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.5ex]"
			text_replace "`texfile'" "\BSbottomrule" "\BSbottomrule\BSaddlinespace[-1.5ex]"
			text_replace "`texfile'" "\W&&&& \BStabularnewline" "\W\BSaddlinespace[`addspace']"
		}

		* Make general replacements
		text_replace "`texfile'" "+ Medical Spending----------------------------&&&&" "\BSmulticolumn{5}{l}{\BStextbf{A. Medical Spending`class'}}"
		text_replace "`texfile'" "+ Employment and Productivity-----------------&&&&" "\BSmulticolumn{5}{l}{\BStextbf{B. Employment and Productivity`class'}}"
		text_replace "`texfile'" "+ Health Status and Behaviors-----------------&&&&" "\BSmulticolumn{5}{l}{\BStextbf{C. Health Status and Behaviors`class'}}"
		text_replace "`texfile'" "+ Medical Utilization (Quantity)--------------&&&&" "\BSmulticolumn{5}{l}{\BStextbf{D. Medical Utilization (Quantity)`class'}}"
		text_replace "`texfile'" "Drug spending [admin]" "\BS \BS \BS \BS Drug spending [admin]"
		text_replace "`texfile'" "Office spending [admin]" "\BS \BS \BS \BS Office spending [admin]"
		text_replace "`texfile'" "Hospital spending [admin]" "\BS \BS \BS \BS Hospital spending [admin]"
		text_replace "`texfile'" "Other spending [admin]" "\BS \BS \BS \BS Other spending [admin]"
		text_replace "`texfile'" "N=" "\BStextit{N=}"
	}

}
}

*---------------------------------------------------------------------------------------------------------------------------------------
*** ITT effects on average and nonzero spending, by treatment arm
*---------------------------------------------------------------------------------------------------------------------------------------
qui {
* Load claims
use "`wellness_analysis_dta'", clear

* Keep only individuals enrolled in the study (treatment or control groups)
drop if inlist(StudyArm,"Not Randomized":StudyArm)

* Generate treatment group variables
assert !inlist(StudyArm, "Not Randomized":StudyArm)
gen byte T = !inlist(StudyArm, "Control":StudyArm)
gen byte T_A = inlist(StudyArm, "A25":StudyArm, "A75":StudyArm)
gen byte T_B = inlist(StudyArm, "B25":StudyArm, "B75":StudyArm)
gen byte T_C = inlist(StudyArm, "C25":StudyArm, "C75":StudyArm)
gen byte T_25 = inlist(StudyArm, "A25":StudyArm, "B25":StudyArm, "C25":StudyArm)
gen byte T_75 = inlist(StudyArm, "A75":StudyArm, "B75":StudyArm, "C75":StudyArm)
gen byte T_A25 = T_A*T_25
gen byte T_B25 = T_B*T_25
gen byte T_C25 = T_C*T_25
gen byte T_A75 = T_A*T_75
gen byte T_B75 = T_B*T_75
gen byte T_C75 = T_C*T_75

* Note that the last 5 treatment variables are co-linear:
assert T_A + T_B + T_C == T_25 + T_75

* Medical Spending
if 1 {
preserve

* Post period definisions
local period 0816_0717
tempfile spend_`period'
tempfile nonzero_spend_`period'

* Reduced form regressions
foreach outvar in spend_`period' nonzero_spend_`period' {

	* Weights for average monthly spending
	local weights
	if !strpos("`outvar'", "nonzero") local weights [aw = covg_`period']
	di "`weights'"

	if !strpos("`outvar'","nonzero") local tbl_settings "format(%11.2fc) parentheses(stderr) asterisk(10 5 1) order(T T_A T_B T_C T_75)"
	else                             local tbl_settings "format(%11.3fc) parentheses(stderr) asterisk(10 5 1) order(T T_A T_B T_C T_75)"

	qui _regress `outvar' T `weights', robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"No",prob_f,`F') table(t1,`tbl_settings') replace

	qui _regress `outvar' T `weights', a(Strata) robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"Yes",prob_f,`F') table(t2,`tbl_settings') append

	qui _regress `outvar' T_A T_B T_C T_75 `weights', robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"No",prob_f,`F') table(t3,`tbl_settings') append

	qui _regress `outvar' T_A T_B T_C T_75 `weights', a(Strata) robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"Yes",prob_f,`F') table(t4,`tbl_settings') append

	qui _regress `outvar' T_A25 T_B25 T_C25 T_A75 T_B75 T_C75 `weights', robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"No",prob_f,`F') table(t5,`tbl_settings') append

	qui _regress `outvar' T_A25 T_B25 T_C25 T_A75 T_B75 T_C75 `weights', a(Strata) robust
	local F = `=Ftail(e(df_m), e(df_r), e(F))'
	regsave using ``outvar'', cmdline t p addlabel(Strata,"Yes",prob_f,`F') table(t6,`tbl_settings') append
}

* Table 3: post-period causal effect of wellness on total health care spending
use `spend_`period'', clear
drop if strpos(var,"tstat")
drop if strpos(var,"pval")
drop if var=="r2"
replace var = "" if strpos(var,"stderr")
replace var = subinstr(var,"_coef","",.)
replace var = "Treatment Group (any)" if var == "T"
replace var = "Group A* (A25, A75)" if var == "T_A"
replace var = "Group B* (B25, B75)" if var == "T_B"
replace var = "Group C* (C25, C75)" if var == "T_C"
replace var = "Group *75 (A75, B75, C75)" if var == "T_75"
foreach t in A25 B25 C25 A75 B75 C75 {
	replace var = "Group `t'" if var == "T_`t'"
}
replace var = "Constant" if var=="_cons"
replace var = "\$N$" if var== "N"
replace var = "Strata FE" if var=="Strata"
replace var = "F Test" if var=="prob_f"
list, sep(0)

local filename appendix_itt_studyarm_spending_`timeframe'
local using using "`results_tables'/`filename'.tex"
local marker marker(tab:`filename')
local title title(`"First-Year Treatment Effects (ITT) by Treatment Group: Total Health Care Spending"')
local fn footnote("Notes: Each column reports estimates from a separate regression estimated over individuals in the treatment and control groups in the claims sample. The outcome in each regression is average monthly health care spending over the first 12 months of the wellness program (August 2016--July 2017), and regressions are weighted by the number of months of coverage. The independent variables are indicators for inclusion in the specified treatment groups. Regressions reported in columns (2), (4), and (6) are the same as those reported in columns (1), (3), and (5) respectively, but with the addition of strata fixed effects. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
local size = "size(footnotesize)"

* Export as LaTeX table
texsave `using', `texsave_settings' nonames `marker' hlines(-3) `title' `fn' `size'

* Table 4: post-period causal effect of wellness on probability of non-zero health care spending
use `nonzero_spend_`period'', clear
drop if strpos(var,"tstat")
drop if strpos(var,"pval")
drop if var=="r2"
replace var = "" if strpos(var,"stderr")
replace var = subinstr(var,"_coef","",.)
replace var = "Treatment Group (any)" if var == "T"
replace var = "Group A* (A25, A75)" if var == "T_A"
replace var = "Group B* (B25, B75)" if var == "T_B"
replace var = "Group C* (C25, C75)" if var == "T_C"
replace var = "Group *75 (A75, B75, C75)" if var == "T_75"
foreach t in A25 B25 C25 A75 B75 C75 {
	replace var = "Group `t'" if var == "T_`t'"
}
replace var = "Constant" if var=="_cons"
replace var = "\$N$" if var== "N"
replace var = "Strata FE" if var=="Strata"
replace var = "F Test" if var=="prob_f"
list, sep(0)

local filename appendix_itt_studyarm_anyspending_`timeframe'
local using using "`results_tables'/`filename'.tex"
local marker marker(tab:`filename')
local title title(`"First-Year Treatment Effects (ITT) by Treatment Group: Any Health Care Spending"')
local fn footnote("Notes: Each column reports estimates from a separate regression estimated over individuals in the treatment and control groups in the claims sample. The outcome in each regression is an indicator for positive health care spending over the first 12 months of the wellness program (August 2016--July 2017). The independent variables are indicators for inclusion in the specified treatment groups. Regressions reported in columns (2), (4), and (6) are the same as those reported in columns (1), (3), and (5) respectively, but with the addition of strata fixed effects. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
local size = "size(footnotesize)"

* Export as LaTeX table
texsave `using', `texsave_settings' nonames `marker' hlines(-3) `title' `fn' `size'

restore
}

}


*---------------------------------------------------------------------------------------------------------------------------------------
*** ITT/IV spending effects, by degree of winsorization
*---------------------------------------------------------------------------------------------------------------------------------------
qui {
	* Load data for individuals in the treatment or control groups
	use "`data_dir'/lasso/lasso_data_spend_0816_0717.dta", clear

	* Merge in winsorized spending
	merge 1:1 AnalysisID using "`wellness_analysis_dta'", assert(match using) keep(match) nogen keepusing(spendw*)
	assert _N==4834

	* Generate original lasso controls
	run "`results_intermediate_files'/treat_effects/lasso/lasso_do_IV_y_spend_0816_0717.do"
	run "`results_intermediate_files'/treat_effects/lasso/lasso_do_IV_d_spend_0816_0717.do"

	* Regression weights
	local aw covg_0816_0717

	* Regression output settings
	local format format(%9.1f)
	local tbl_settings "`format' parentheses(stderr) asterisk(10 5 1)"

	***************************************************************************
	* Estimate regressions with winsorized spending outcomes
	* (a) ITT No Controls
	* (b) ITT Post-Lasso Controls (original post-lasso controls)
	* (c) IV No Controls
	* (d) IV Post-Lasso Controls (original post-lasso controls)
	***************************************************************************
	foreach stub in ITT_NoControl_Winsorized ITT_Lasso_Winsorized IV_NoControl_Winsorized IV_Lasso_Winsorized {
		local append replace
		foreach q in 0 005 01 025 05 10 15 20 25 {
			* Specify outcome
			if `q'==0 local y spend_0816_0717
			if `q'>0  local y spendw`q'_0816_0717

			* Regression
			cap drop rhs
			if "`stub'"=="ITT_NoControl_Winsorized" {
				gen rhs = treat
				qui regress `y' rhs [aw=`aw'], robust
			}
			else if "`stub'"=="ITT_Lasso_Winsorized" {
				gen rhs = treat
				qui regress `y' rhs  v_dbl_y_* [aw=`aw'] if in_lasso_IV==1, robust
			}
			else if "`stub'"=="IV_NoControl_Winsorized" {
				gen rhs = hra_c_nomiss
				qui ivregress 2sls `y' (rhs = i.treat) [aw=`aw'], robust
			}
			else if "`stub'"=="IV_Lasso_Winsorized" {
				gen rhs = hra_c_nomiss
				qui ivregress 2sls `y' (rhs = i.treat) v_dbl_y_* v_dbl_d_* [aw=`aw'] if in_lasso_IV==1, robust
			}

			* Additional stats to save with regression output
			qui sum `y' [aw=`aw'] if e(sample)
			local y_max = `r(max)'
			local win_q = `q'/(10^strlen("`q'"))*100

			* Save estimation output, numerically and as nicely formatted table
			local addlabel addlabel(win_q, `win_q', y_max, `y_max')
			estimates save "`results_intermediate_files'/winsorization/estimates/`stub'_`y'.ster", replace
			regsave rhs using "`results_intermediate_files'/winsorization/`stub'_num.dta", p `addlabel' cmdline ci `append'
			regsave rhs using "`results_intermediate_files'/winsorization/`stub'_tab.dta", p table(win_q`q',`tbl_settings') `addlabel' cmdline ci `append'

			local append append
		}
	}


	***************************************************************************
	* Figures of ITT result by degree of winsorization
	***************************************************************************
	if 1 {
		use "`results_intermediate_files'/winsorization/ITT_NoControl_Winsorized_num.dta", clear
		append using "`results_intermediate_files'/winsorization/ITT_Lasso_Winsorized_num.dta", gen(controls)

		* Common Figure settings
		local win_q_max 25
		local coef_line_nocontrol line coef win_q                if controls==0 & win_q<=`win_q_max', sort connect(l) lpattern(solid) lcolor(`dkorange') lwidth(*1.5) msymbol(none) mcolor(`dkorange') msize(*.65)
		local ci_area_nocontrol   rarea ci_lower ci_upper win_q  if controls==0 & win_q<=`win_q_max', sort fcolor(`dkorange') lcolor(`dkorange') color(%30) lwidth(*.2)
		local coef_line_lasso     line coef win_q                if controls==1 & win_q<=`win_q_max', sort connect(l) lpattern(dash) lcolor(black) lwidth(*1) msymbol(none) mcolor(black) msize(*.65)
		local ci_area_lasso       rarea ci_lower ci_upper win_q  if controls==1 & win_q<=`win_q_max', sort fcolor(`ltblue') lcolor(`ltblue') color(%30) lwidth(*.2)
		local x_axis xlabel(0(5)`win_q_max', grid glcolor(gs14) glwidth(vthin)) xtitle("Percent winsorization (top-coding)", bmargin(b=0))
		local y_axis ylabel(-100(50)100, grid glcolor(gs14) glwidth(vthin)) ytitle("Treatment effect (ITT)", bmargin(l=0) margin(l=0))
		local region plotregion(margin(l=0 r=0 t=0 b=0) col(white)) graphregion(margin(l=0 r=2 t=3 b=0) col(white))
		local lines yline(0, lcolor(`dkgray'))

		* ITT, No Controls + Lasso Controls (only)
		local legend legend(on order(4 2 3 1) label(4 "ITT estimate (no controls)") label(2 "95% CI (no controls)") label(3 "ITT estimate (Lasso controls)") label(1 "95% CI (Lasso controls)") bmargin(r=0 l=0 b=0) rows(2))
		twoway (`ci_area_lasso') (`ci_area_nocontrol') (`coef_line_lasso') (`coef_line_nocontrol'), `x_axis' `y_axis' `legend' `region' `lines'
		graph export  "`results_figures'/ITT_spend_Winsorized_Controls_`timeframe'.pdf", as(pdf) replace
	}


	***************************************************************************
	* Tables of ITT/IV result by degree of winsorization
	***************************************************************************

	* Table: Lasso controls
	if 1 {
		* Winsorization levels to keep
		local win_keep win_q0 win_q005 win_q01 win_q025 win_q05

		* Load regression results
		clear
		gen specification = ""
		local ITT_Lasso_Winsorized_label "+ ITT Lasso Controls Winsorized----------------------------"
		local  IV_Lasso_Winsorized_label "+ IV Lasso Controls Winsorized-----------------------------"

		* Load regression results
		foreach stub in ITT_Lasso_Winsorized IV_Lasso_Winsorized {
			desc using "`results_intermediate_files'/winsorization/`stub'_tab.dta"
			local N_obs = `r(N)'

			append using "`results_intermediate_files'/winsorization/`stub'_tab.dta"
			ingap -`N_obs'
			replace specification = "`stub'" if missing(specification) & (_n>_N-`N_obs')
			replace var = "``stub'_label'" if missing(specification) & (_n==_N-`N_obs')

		}

		* Keep only selected winsorization levels
		keep specification var `win_keep'

		* Combine lower and upper confidence interval results into single cell
		replace var = "rhs_ci" if var=="rhs_ci_upper"
		assert var[_n+1]=="rhs_ci_lower" if var=="rhs_ci"
		foreach v of varlist win_q* {
			replace `v' = "[" + `v'[_n+1] + ", " + `v'[_n] + "]" if var=="rhs_ci"
		}
		drop if var=="rhs_ci_lower"

		* Keep specified regression output stats, and clean row names
		keep if inlist(var, "rhs_coef", "rhs_stderr", "rhs_ci", "N", "win_q") | missing(specification)
		replace var = "spend_0816_0717" if var=="rhs_coef"
		replace var = "" if inlist(var, "rhs_stderr", "rhs_ci")
		replace var = "N_obs" if var=="N"
		replace var = "Winsorization (percent)" if var=="win_q"
		drop specification
		cleanvars var

		* Create LaTeX table
		local filename winsorized_spending_controls_`timeframe'
		local using using "`results_tables'/`filename'.tex"
		local texsave_settings "replace autonumber nofix"
		local marker marker("tab:`filename'")
		local title title(`"First-Year Winsorized Medical Spending Treatment Effects"')
		local fn footnote("Notes: Each row and column reports estimates from a separate regression, where observations include individuals in the control or treatment groups. The outcome in each regression is winsorized (top-coded) average monthly medical spending over the first 12 months of the intervention, winsorized at the level indicated in each column. Regressions are weighted by the number of months of coverage. In Panel A (ITT), the focal independent variable is an indicator for inclusion in the treatment group, and all regressions include the same controls as the ITT post-Lasso specification reported in row 1 and column (3) of Table~\ref{tab:itt}. In Panel B (IV), the (endogenous) focal independent variable is an indicator for completing the screening and HRA, the instrument is an indicator for inclusion in the treatment group, and all regressions include the same controls as the IV post-Lasso specification reported in row 1 and column (2) of Table~\ref{tab:iv_ols_`timeframe'}. Column (1) replicates the (non-winsorized) ITT and IV post-Lasso results reported in Table~\ref{tab:itt} and Table~\ref{tab:iv_ols_`timeframe'}. Robust standard errors are reported in parentheses, and 95\% confidence intervals are reported in brackets. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
		local size = "size(scriptsize)"

		* Export as LaTeX table
		local hline1 = 1
		local hline2 = 4
		local hline3 = 6
		ingap 7
		local hline4 = 8
		local hline5 = 11
		local hlines hlines(`hline1' `hline2' `hline3' `hline4' `hline5')
		texsave `using', `texsave_settings' nonames `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

		* Additional LaTeX table tweaks
		local texfile "`results_tables'/`filename'.tex"
		text_replace "`texfile'" "\BSaddlinespace[\BSbelowrulesep]" ""
		text_replace "`texfile'" "\BSmidrule &&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.4ex]"
		text_replace "`texfile'" "\BSmidrule\W&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.4ex]"
		text_replace "`texfile'" "\BSbottomrule" "\BSbottomrule\BSaddlinespace[-1.5ex]"
		text_replace "`texfile'" "\W&&&& \BStabularnewline" "\W\BSaddlinespace[1.4ex]"

		* Make general replacements
		text_replace "`texfile'" "+ ITT Lasso Controls Winsorized----------------------------&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{A. ITT Estimates (Post-Lasso)}}"
		text_replace "`texfile'" "+ IV Lasso Controls Winsorized-----------------------------&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{B. IV Estimates (Post-Lasso)}}"
		text_replace "`texfile'" "N_obs" "\BStextit{N}"
	}

	* Appendix Table: No controls and Lasso controls
	if 1 {
		* Winsorization levels to keep
		local win_keep win_q0 win_q005 win_q01 win_q025 win_q05

		* Load regression results
		clear
		gen specification = ""
		local ITT_NoControl_Winsorized_label "+ ITT No Control Winsorized----------------------------"
		local ITT_Lasso_Winsorized_label "+ ITT Lasso Controls Winsorized----------------------------"
		local  IV_NoControl_Winsorized_label "+ IV No Control Winsorized-----------------------------"
		local  IV_Lasso_Winsorized_label "+ IV Lasso Controls Winsorized-----------------------------"

		* Load regression results
		foreach stub in ITT_NoControl_Winsorized ITT_Lasso_Winsorized IV_NoControl_Winsorized IV_Lasso_Winsorized {
			desc using "`results_intermediate_files'/winsorization/`stub'_tab.dta"
			local N_obs = `r(N)'

			append using "`results_intermediate_files'/winsorization/`stub'_tab.dta"
			ingap -`N_obs'
			replace specification = "`stub'" if missing(specification) & (_n>_N-`N_obs')
			replace var = "``stub'_label'" if missing(specification) & (_n==_N-`N_obs')

		}

		* Keep only selected winsorization levels
		keep specification var `win_keep'

		* Combine lower and upper confidence interval results into single cell
		replace var = "rhs_ci" if var=="rhs_ci_upper"
		assert var[_n+1]=="rhs_ci_lower" if var=="rhs_ci"
		foreach v of varlist win_q* {
			replace `v' = "[" + `v'[_n+1] + ", " + `v'[_n] + "]" if var=="rhs_ci"
		}
		drop if var=="rhs_ci_lower"

		* Keep specified regression output stats, and clean row names
		keep if inlist(var, "rhs_coef", "rhs_stderr", "rhs_ci", "N", "win_q") | missing(specification)
		replace var = "spend_0816_0717" if var=="rhs_coef"
		replace var = "" if inlist(var, "rhs_stderr", "rhs_ci")
		replace var = "N_obs" if var=="N"
		replace var = "Winsorization (percent)" if var=="win_q"
		drop specification
		cleanvars var

		* Create LaTeX table
		local filename appendix_winsorized_spending_full_`timeframe'
		local using using "`results_tables'/`filename'.tex"
		local texsave_settings "replace autonumber nofix"
		local marker marker("tab:`filename'")
		local title title(`"First-Year Winsorized Medical Spending Treatment Effects"')
		local fn footnote("Notes: Each row and column reports estimates from a separate regression, where observations include individuals in the control or treatment groups. The outcome in each regression is winsorized (top-coded) average monthly health care spending over the first 12 months of the wellness program (August 2016--July 2017), winsorized at the level indicated in each column. Regressions are weighted by the number of months of coverage. In Panels A and B (ITT), the focal independent variable is an indicator for inclusion in the treatment group. The specifications reported in Panel A do not include controls, while those reported in Panel B include the same controls as the ITT post-Lasso specification reported in row 1 and column (3) of Table~\ref{tab:itt}. In Panels C and D (IV), the (endogenous) focal independent variable is an indicator for completing the screening and HRA and the instrument is an indicator for inclusion in the treatment group. The specifications reported in Panel C do not include controls, while those reported in Panel D include the same controls as the IV post-Lasso specification reported in row 1 and column (2) of Table~\ref{tab:iv_ols_`timeframe'}. There is no winsorization of the outcome in column (1), and thus the ITT and IV estimates are identical to the total spending effects of the corresponding No Controls and Post-Lasso specifications reported in Table~\ref{tab:itt} and Table~\ref{tab:iv_ols_`timeframe'}. Robust standard errors are reported in parentheses, and 95\% confidence intervals are reported in brackets. A */**/*** indicates significance at the 10/5/1\% level using conventional inference.")
		local size = "size(scriptsize)"

		* Export as LaTeX table
		local hline1 = 1
		local hline2 = 4
		local hline3 = 6
		ingap 7
		local hline4 = 8
		local hline5 = 11
		local hline6 = 13
		ingap 14
		local hline7 = 15
		local hline8 = 18
		local hline9 = 20
		ingap 21
		local hline10 = 22
		local hline11 = 25
		local hlines hlines(`hline1' `hline2' `hline3' `hline4' `hline5' `hline6' `hline7' `hline8' `hline9' `hline10' `hline11')
		texsave `using', `texsave_settings' nonames `marker' `hlines' headlines("\setlength{\tabcolsep}{6pt}") `title' `fn' `size'

		* Additional LaTeX table tweaks
		local texfile "`results_tables'/`filename'.tex"
		text_replace "`texfile'" "\BSaddlinespace[\BSbelowrulesep]" ""
		text_replace "`texfile'" "\BSmidrule &&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.4ex]"
		text_replace "`texfile'" "\BSmidrule\W&&&& \BStabularnewline" "\BSmidrule\BSaddlinespace[1.4ex]"
		text_replace "`texfile'" "\BSbottomrule" "\BSbottomrule\BSaddlinespace[-1.5ex]"
		text_replace "`texfile'" "\W&&&& \BStabularnewline" "\W\BSaddlinespace[1.4ex]"

		* Make general replacements
		text_replace "`texfile'" "`ITT_NoControl_Winsorized_label'&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{A. ITT Estimates (No Controls)}}"
		text_replace "`texfile'" "`ITT_Lasso_Winsorized_label'&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{B. ITT Estimates (Post-Lasso)}}"
		text_replace "`texfile'" "`IV_NoControl_Winsorized_label'&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{C. IV Estimates (No Controls)}}"
		text_replace "`texfile'" "`IV_Lasso_Winsorized_label'&&&&&" "\BSmulticolumn{6}{l}{\BStextbf{D. IV Estimates (Post-Lasso)}}"
		text_replace "`texfile'" "N_obs" "\BStextit{N}"
	}

}


*---------------------------------------------------------------------------------------------------------------------------------------
*** Identify whether treatment effects operate through screening or through wellness
*---------------------------------------------------------------------------------------------------------------------------------------
qui {
	use "`wellness_analysis_dta'", clear

	* Limit to people in the study
	drop if mi(treat)

	* People in control group cannot participate by construction
	replace hra_c = 0        if treat==0
	replace activity_f_c = 0 if treat==0
	replace activity_s_c = 0 if treat==0

	* Define wellness participation variable
	gen activity_c = activity_f_c==1 & activity_s_c==1 if !mi(treat)
	label var activity_c "Completed a fall and spring wellness activity"

	* Run IV regressions. Use all combinations of monetary incentives as instruments.
	tempfile results
	local replace replace
	qui foreach v in everscreen_0717 mgmtsafety_0717 {

		ivreg2 `v' (hra_c activity_c = i.StudyArm), robust
		regsave using "`results'", t p table("`v'1", asterisk(10 5 1) format(%12.3fc)) addlabel(outcome,"`v'","Strata_FE","No", F,"`e(widstat)'") `replace'
		local replace append

		ivreg2 `v' (hra_c activity_c = i.StudyArm) i.Strata, robust
		regsave using "`results'", t p table("`v'2", asterisk(10 5 1) format(%12.3fc)) addlabel(outcome,"`v'","Strata_FE","Yes",F,"`e(widstat)'") append
	}

	use "`results'", clear

	drop if strpos(var,".Strata") | strpos(var,"_tstat") | strpos(var,"_pval") | strpos(var,"_cons")
	drop if inlist(var,"r2","outcome")
	foreach v of varlist ever* mgmt* {
		replace `v' = "("+`v'+")" if strpos(var,"_stderr")
	}
	replace var = "" if strpos(var,"stderr")
	replace var = subinstr(var,"_coef","",.)

	replace var = "Completed Screening and HRA" if var=="hra_c"
	replace var = "Completed Fall and Spring Wellness Activities" if var=="activity_c"
	replace var = "Strata FE" if var=="Strata_FE"
	replace var = "First-stage F-statistic" if var=="F"
	replace var = "\(N\)" if var=="N"

	label var everscreen_07171 "Ever screened"
	label var everscreen_07172 "Ever screened"
	label var mgmtsafety_07171 "Management priority on health/safety"
	label var mgmtsafety_07172 "Management priority on health/safety"

  local filename appendix_mechanisms_`timeframe'
	local fn "Notes: Each column reports estimates from a separate regression. The outcome variable is specified by the column heading. We instrument for both regressors using six indicators for inclusion in the six treatment groups. Robust standard errors are reported in parentheses. A */**/*** indicates significance at the 10/5/1\% level."
	local title "First-Year IV Treatment Effects: Screening and Wellness Participation"
	texsave using "`results_tables'/`filename'.tex", varlabels marker("tab:`filename'") title("`title'") headerlines("\midrule") footnote("`fn'") hlines(-3) replace autonumber nofix
}


** EOF
